{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.chdir('../')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/home/njuciairs/wangshuai/test/FinancialNagetiveEntityJudge'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.getcwd()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from evaluation.evaluate import evaluate\n",
    "from data_utils.basic_data import load_train_val_dataset,load_basic_dataset\n",
    "from results_process.regulizer import remove_nine,remove_short_entity\n",
    "from results_process.utils import load_model_rs\n",
    "from results_process.bert_entity_model import reduce_rs_by_id\n",
    "from functools import reduce\n",
    "import numpy as np\n",
    "from data_utils.bert_multi_class_data import get_train_val_data_loader, get_test_loader,TestEntityDataset\n",
    "import pandas as pd\n",
    "from collections import Counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw_df = load_model_rs(model_name='multi_class_cross1',version_id=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfs = [load_model_rs(model_name='multi_class_cross1',version_id=i) for i in range(1,10)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_df = load_basic_dataset(split='test')\n",
    "test_dataset = TestEntityDataset(test_df, max_len=400)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "texts = [sample.text for sample in test_dataset]\n",
    "entity = [t.split('[SEP]')[0][5:] for t in texts]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "for df in dfs:\n",
    "    df['key_entity'] = entity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "group = pd.concat(dfs).groupby(['id','key_entity'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "大学生贷 Counter({1: 8, 2: 1})\n",
      "花呗 Counter({1: 5, 0: 2, 2: 2})\n",
      "蚂蚁金服 Counter({1: 8, 0: 1})\n"
     ]
    }
   ],
   "source": [
    "#获取模型犹豫不决的samples\n",
    "hesitate_samples = []\n",
    "for (id,key),df in group:\n",
    "    count = Counter(df['predict_labels'])\n",
    "    if len(count) > 1:\n",
    "        hesitate_samples.append((id,key,count,test_df[test_df['id']==id][['entity']].values[0]))\n",
    "    if id=='e9abc6b0':\n",
    "        print(key,count)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1318"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(hesitate_samples)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "65"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#把犹豫的samples中由于有更长的entity在的情况给去掉\n",
    "hesitate_long_rmed = []\n",
    "for hesitate in hesitate_samples:\n",
    "    id,key,count,entities = hesitate\n",
    "    for e in entities:\n",
    "        e = str(e)\n",
    "        if key != e and key in e:\n",
    "            continue\n",
    "        else:\n",
    "            hesitate_long_rmed.append(hesitate)\n",
    "len(hesitate_long_rmed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('054b8412', '你我金融', Counter({0: 5, 2: 4}), array(['你我金融'], dtype=object))\n",
      "[[nan '3、被起诉至法院你我金融绝不轻易姑息有严重逾期行为的借款人' '你我金融']]\n",
      "==========================\n",
      "('06cf507c', 'paypal', Counter({2: 6, 0: 2, 1: 1}), array(['paypal'], dtype=object))\n",
      "[[nan 'palantir的大部分工作自然需要保密，但众所周知的是，该公司发现可疑欺诈活动的模式和数据异常技术源自在线支付服务paypal'\n",
      "  'paypal']]\n",
      "==========================\n",
      "('0c94dd90', '普惠家', Counter({0: 8, 2: 1}), array(['普惠家'], dtype=object))\n",
      "[['普惠家最新消息追踪 互金跟投第0425期普惠家复盘'\n",
      "  ' 互金跟投(51hjgt.com)4月26日复盘：考虑到目前P2P网贷行业风雨多变，对于粉丝们重点关注的普惠家最新消息，互金跟投将会持续关注并对平台进行定期复盘，第一时间与大家分享。下文是普惠家本周最新复盘。点此加入普惠家最新消息互通群，了解详情>> {IMG:1} 0 1媒体报道 据普惠家官微报道，《网贷行业备案将至，普惠家全力备战》。 报道称，2018年上半年，普惠家紧锣密鼓针对整改验收做出了进一步具体、详细的部署。 目前，普惠家各项合规备案工作正有条不紊快速推进，通过不断提高的合规资质、透明的资产、严谨的风控保障等关键要素取得备案优势。0 2平台公告 互金跟投小编调研发现，平台官网每天都在公布还款公告，具体的出借人可以在官网上看到。 {IMG:2} {IMG:3} 关注互金跟投公众号，查看更多内容。 互金跟投公众号重点复盘行业Top100家平台，如大家想查看平台复盘，可以进入对应平台交流群，群里你可以定期获得平台的周期复盘内容。 更多内容提供包括→ 1、284家P2P最新回款进度 2、590家合规备案进度 3、2019年237家P2P预警名单 4、2400+家P2P平台舆情追踪 5、互金跟投将持续对普惠家进行消息复盘，你可以加入普惠家交流圈，可收到关于普惠家的定期消息复盘。点此加入普惠家最新消息互通群，了解详情>>'\n",
      "  '普惠家']]\n",
      "==========================\n",
      "('0d21f2cb', '新华财富', Counter({1: 8, 2: 1}), array(['新华财富'], dtype=object))\n",
      "[[nan\n",
      "  '发表了博文《新华财富资产管理有限公司涉嫌项目造假及虚假宣传4.9亿私募基金产品难兑付》2cR帝都网-多度网2cR帝都网-多度网2cR帝都网-多度网2cR帝都网-多度网2cR帝都网-多度网2cR帝都网-http://t.cn/Ruz1qvb??'\n",
      "  '新华财富']]\n",
      "==========================\n",
      "('0dc42a68', '江西银行', Counter({0: 8, 1: 1}), array(['江西银行'], dtype=object))\n",
      "[['江西银行婺源支行惠农信贷通确认放款名单'\n",
      "  ' 您所在的位置：首页 >> 资讯 >> 通知公告 >>江西银行婺源支行惠农信贷通确认放款名单公示 发布时间：  2018-11-08 ????来源：  江西银行婺源县支行??中 小】 字号：【 大 根据饶办发（2014）26号上饶市委办公厅、市政府办公厅关于印发《上饶市财政惠农信贷通融资试点工作方案》文件要求，现就婺源县农工部、江西银行婺源县支行审批通过惠农信贷通贷款人员名单公示如下； 序号 ???名 ?称 ?法 人 ?电 ?话 ?经营范围 核实后拟授信额度 ??1 婺源县清华茶叶种植户 黄彤 13803592867 茶叶种植、销售 200万元 ??2 在公示期内，如何单位和个人都可通过来电和来访形式，反映公示对象在信誉和真实性等方面存在的问题，公示时间：从2018年10月9日到2018年10月11日止，共三天。 投诉电话；婺源县委农工部；0793-7358931.江西银行婺源县支行；0793-7259822.'\n",
      "  '江西银行']]\n",
      "==========================\n",
      "('11b63dff', '信融投资', Counter({0: 8, 2: 1}), array(['信融投资'], dtype=object))\n",
      "[['我在信融投资/4年安全老平台/包商银行存管合作/每周向监管汇报&ChannelValue=app_meizu ?'\n",
      "  '我在信融投资/4年安全老平台/包商银行存管合作/每周向监管汇报&ChannelValue=app_meizu ?' '信融投资']]\n",
      "==========================\n",
      "('1694dc1c', '盛付通', Counter({0: 7, 1: 2}), array(['盛付通'], dtype=object))\n",
      "[[nan '17' '盛付通']]\n",
      "==========================\n",
      "('1976b67f', '币安', Counter({0: 6, 2: 3}), array(['币安'], dtype=object))\n",
      "[[nan '这得益于2017年9月，中国监管部门正式将ICO定为非法融资之后，币安迅速转向海外，避开了雷区' '币安']]\n",
      "==========================\n",
      "('1d45f97c', '民生银行', Counter({0: 8, 2: 1}), array(['民生银行'], dtype=object))\n",
      "[['民生银行推出微贷产品：零等待、零抵押、零担保'\n",
      "  '精彩热图 {IMG:2} 揭秘明星婚姻现状 {IMG:3} 网络女神真照坑爹 {IMG:4} 星2代萌化观众 {IMG:5} 凌潇肃欧洲蜜月照 {IMG:6} 那些年 我们一起备战过的考研 {IMG:7} 萌娃闹大本营 {IMG:8} 青岛一经适房墙体上画假窗 {IMG:9} 鱿鱼西施迷倒食客 ... 今日推荐 {IMG:10} 尼泊尔老翁身高仅41厘米 {IMG:11} 印度男子胡须长4.3米创纪录 搞笑动物万圣节 南瓜诱惑难抵挡脸谱网可预测恋人分手时间 共同朋友圈是关键 2013-10-30 16:30美雕刻家打造南瓜版鬼怪迎接万圣节? 2013-10-30 14:17离婚率高达40%！挪威政府敦促夫妻多约会 2013-10-30 11:14印度民众积极准备迎接卡莉女神庆典（组图） 2013-10-30 07:48打赌尘封糖果25年 两兄弟被赞应获搞笑诺贝尔奖 2013-10-29 19:33澳大利亚维州采取强硬措施整治司机开车用手机 2013-10-29 17:01 ...'\n",
      "  '民生银行']]\n",
      "==========================\n",
      "('23acf103', '普惠金融', Counter({1: 8, 2: 1}), array(['普惠金融'], dtype=object))\n",
      "[[nan\n",
      "  '《无良理财平台冲击金融风险底线，600多万受害人投诉无门》多盈财富号称中国领先的直销银行平台，其控股人周之峰打着响应国家发展普惠金融号召的旗帜，非法吸收公众财产非法自融，大量投资项目逾期，导致来自全国各地的600多万投资者投资到期无法兑付'\n",
      "  '普惠金融']]\n",
      "==========================\n",
      "('2832f199', '金评媒', Counter({0: 7, 1: 2}), array(['金评媒'], dtype=object))\n",
      "[[nan\n",
      "  '(金评媒)2.;长沙要求未纳入整治范围P2P机构报送业务开展情况;;;6日，长沙市非法集资专项整治工作联席会议专门工作办公室发布《关于报送P2P网贷业务开展情况的通知》'\n",
      "  '金评媒']]\n",
      "==========================\n",
      "('29cf4db3', '联璧金融', Counter({2: 7, 1: 2}), array(['联璧金融'], dtype=object))\n",
      "[[nan '#斐讯0元购诱导非法吸储集资#@CCTV焦点访谈@半岛网@斐讯-顾国平@联璧金融' '联璧金融']]\n",
      "==========================\n",
      "('2b11a1c2', 'plustoken', Counter({2: 6, 0: 3}), array(['plustoken'], dtype=object))\n",
      "[[nan\n",
      "  '<br>其最大风险是跑路，因为其智能狗模式意味着把自己的加密资产财富控制权交给plustoken，但你如果需要通过智能狗获利，并且希望发展自己的团队，这个又是必选，这是plustoken第一发展阶段的核心价值，也是众多模仿者抄袭的地方，智能狗对于plustoken就类似于qq、微信对于腾讯游戏的重要性，没有qq、微信这样的社交流量口，腾讯游戏不可能只手遮天'\n",
      "  'plustoken']]\n",
      "==========================\n",
      "('2fe2c9e8', '红小宝', Counter({0: 8, 2: 1}), array(['红小宝'], dtype=object))\n",
      "[['红小宝最新消息追踪 互金跟投第0428期红小宝复盘'\n",
      "  ' 互金跟投(51hjgt.com)4月29日复盘：考虑到目前P2P网贷行业风雨多变，对于粉丝们重点关注的红小宝最新消息，互金跟投将会持续关注并对平台进行定期复盘，第一时间与大家分享。下文是红小宝本周最新复盘。点此加入红小宝最新消息互通群，了解详情>> {IMG:1} 临近五一，红小宝也发布了五一放假公告，有在车上的出借人可以了解一下。 {IMG:2} 互金跟投小编调研红小宝平台发现，截止到４月２７日，红小宝累积注册用户１１６３１７人，部分运营报告如下： {IMG:3} {IMG:4} 关注互金跟投公众号，查看更多内容。 互金跟投公众号重点复盘行业Top100家平台，如大家想查看平台复盘，可以进入对应平台交流群，群里你可以定期获得平台的周期复盘内容。 更多内容提供包括→ 1、284家P2P最新回款进度 2、590家合规备案进度 3、2019年237家P2P预警名单 4、2400+家P2P平台舆情追踪 5、互金跟投将持续对红小宝进行消息复盘，你可以加入红小宝交流圈，可收到关于红小宝的定期消息复盘。点此加入红小宝最新消息互通群，了解详情>>'\n",
      "  '红小宝']]\n",
      "==========================\n",
      "('3b2e4167', '每经网', Counter({0: 6, 1: 3}), array(['每经网'], dtype=object))\n",
      "[[nan\n",
      "  '[1]胡群.信用卡诈骗案呈上升趋势.北京首现透支禁止令[J/OL].每经网.[2011-5-10].http://www.zznews.gov.cn'\n",
      "  '每经网']]\n",
      "==========================\n",
      "('3f36475b', '小白财经', Counter({2: 6, 0: 2, 1: 1}), array(['小白财经'], dtype=object))\n",
      "[[nan '【小白财经】浙江特大假药诈骗案告破：靠朋友圈招揽生意涉案金额达1.5亿' '小白财经']]\n",
      "==========================\n",
      "('41873be6', '介贷网', Counter({2: 8, 0: 1}), array(['介贷网'], dtype=object))\n",
      "[[nan '关于介贷网有没有虚假标的的问题' '介贷网']]\n",
      "==========================\n",
      "('4383d929', '龙虎榜', Counter({0: 7, 2: 1, 1: 1}), array(['龙虎榜'], dtype=object))\n",
      "[[nan\n",
      "  '因涉嫌欺诈发行股票等违法行为，于近日被中国证监会移送公安机关的*金亚在连续跌停后今日开板，龙虎榜信息显示，一机构专用席位买入326万元'\n",
      "  '龙虎榜']]\n",
      "==========================\n",
      "('43894704', '中金投资', Counter({0: 8, 1: 1}), array(['中金投资'], dtype=object))\n",
      "[['中金投资退出史带财险获批 “史带系”持股超97%'\n",
      "  '每经记者袁园每经编辑卢九安 发布股权转让方案近一年后，史带财产(来看，随着车险业务的退出，2015年史带财险的 保费下降76.4%至2.5亿元，但后面开始逐步企稳，保费收入也进入了稳步发展期。 盈利方面，自从退出了车险业务，史带财险的净利润仅在2015年出现了小幅下滑，为2534万元，2016~2018年的净利润开始企稳回升，分别为8298万元、5970万元、5078万元。 进入2019年，史带财险的业绩也依旧稳定。其一季度偿付能力数据报告显示，截至一季度末，史带财险的综合偿付能力充足率和核心偿付能力充足率均为270.22%，最近一期风险评级为A，一季度实现保险业务收入2.64亿元，实现净利润1277万元。 史带财险表示，2019年一季度，公司持续不断从制度健全性和遵从有效性两方面强化风险管控。一是修订了公司《风险管理框架制度》，从制度层面对风险管理工作的开展进行更新和细化；二是明确了公司2019年度风险管理的目标和偏好，编制《2019年度风险偏好陈述书》，明确了风险容忍度和限额；三是对2019年公司预算开展了独立风险评估，对公司2019年公司预算的风险承受能力进行了独立测试，以此形成了更加完善、科学、有效的风险管理体系。 （责任编辑：李佳佳 HN153）'\n",
      "  '中金投资']]\n",
      "==========================\n",
      "('455eb5a4', '陆金所', Counter({0: 5, 2: 4}), array(['陆金所'], dtype=object))\n",
      "[['陆金所旗下P2P何去何从？深圳前金服引关注'\n",
      "  ' \\u3000\\u3000 【财新网】（记者 吴雨俭）近日，路透社报道，中国最大的在线财富管理平台陆金所计划退出P2P业务，并且已经开始着手申请消费金融公司牌照，准备转型。 \\u3000\\u3000对此，陆金所方面迅速做出回应，但既未确认，也未否认，仅表示：陆金服P2P业务正积极响应和配合监管三降要求；网贷业务正常运营，存量产品与客户权益不受影响。 \\u3000\\u3000部分业内人士分析，陆金所的转型计划尚未最终落定。一位深圳P2P机构人士告诉财新记者，因为P2P板块一直在影响陆金所的整体上市计划，所以高层一直在考虑是否要将其剥离。'\n",
      "  '陆金所']]\n",
      "==========================\n",
      "('4e3cda19', '中子星投资有限公司', Counter({2: 8, 0: 1}), array(['中子星投资有限公司'], dtype=object))\n",
      "[['中子星投资有限公司官网'\n",
      "  ' {IMG:1} 飞机场不哭！原来平胸的好处有这么多？！ {IMG:2} 萌妹子神乐坂真冬，大呼跳舞跳的有骚气才好玩 {IMG:3} 内涵图：真的不是小内内，请倒过来观看！ {IMG:4} 冷丫专栏：恋爱的感觉 1234 24小时热文 {IMG:5} 微商女友出轨健身教练，男友当场抓奸 {IMG:6} 漂亮妹子下乡干农活，这么贤惠的妹子哪里找 {IMG:7} 男人床上的真相！ {IMG:8} b罩杯有多大图片 胸罩罩杯尺寸说明表 {IMG:9} 衣服这么穿的吗？ {IMG:10} 工资低于8000的人都来看看吧 精彩图文 {IMG:11} 画风变得太快，接受不来 {IMG:12} 夭寿啦！哆啦A梦居然给大熊戴绿帽子啦！ {IMG:13} 火车上遇到一美女，约我在卫生间…… {IMG:14} 短裙配美腿这舞姿简直太性感了！ {IMG:15} 新兵妹子接受电击训练，最疼的竟然是他... 相关推荐 {IMG:16} 男生有这些表现，就是想睡你宇宙中十大最恐怖黑洞恐怖的中子星中子星撞地球会怎样中子星最大半径黑洞里的物质能出来么宇宙有一种中子星中子星75年后毁灭地球揭秘天鹅座黑洞之谜中子星和夸克星中子星结局是爆炸嘛如果中子星靠近地球中子星投资有限公司官网 '\n",
      "  '中子星投资有限公司']]\n",
      "==========================\n",
      "('4e7aae17', '宜信尊享', Counter({2: 7, 0: 2}), array(['宜信尊享'], dtype=object))\n",
      "[['投诉宜信尊享贷款机构'\n",
      "  '2019年7月27日，我在宜信尊享申请了贷款5000元分12期还款，有专门的宜信客服专员微信联系，跟我说要开会员才可以提现，我就花598元开了中级会员，也提现不了，然后客服专员跟我说由于开会的时候他不在所以不知道中级会员已满员，需要开高级才行，我又花1288元开了高级会员，后面由于本人填写错银行卡号导致需要客服专员去银行帮忙更改，需要1000元，我也给了，直到今天2019年7月29日，客服专员跟我说在银行帮我修改卡号，马上放款，五分钟就能到账，又说银行放款部门风控监测到我信誉比较低，要提前还款半年2801元，我也打过去了，打过去的时候我还问后面还会不会要钱，客服专员说 不会要钱了，叫我等待放款就好了，我一直等，等了十分钟左右叫我在转2000元过去，说是放贷经理在帮我排队，要2000元签协议，以上这些钱客服专员说都会返还到我银行卡里，但是说要2000元签协议我就没有给，我投进去的那些钱还能拿回来吗。 此稿由网友上传至华声在线投诉直通车，记者正在进一步调查核实中，未经华声在线许可，严禁转载。'\n",
      "  '宜信尊享']]\n",
      "==========================\n",
      "('4eec1b40', '京东白条', Counter({0: 6, 2: 3}), array(['京东白条'], dtype=object))\n",
      "[['话题:京东白条将上征信报告 不及时还款后果很严重！' '话题:京东白条将上征信报告 不及时还款后果很严重！' '京东白条']]\n",
      "==========================\n",
      "('50724fd7', '上海联璧电子科技（集团）有限公司', Counter({2: 7, 1: 2}), array(['上海联璧电子科技（集团）有限公司'], dtype=object))\n",
      "[[nan\n",
      "  '【联璧科技涉嫌非法吸储被调查警方带走多名工作人员与数箱物品】6月21日，《每日经济新闻》记者获悉，上海联璧电子科技（集团）有限公司（联璧金融运营主体公司）已被公安部门立案侦查，立案缘由是涉嫌非法吸收公众存款'\n",
      "  '上海联璧电子科技（集团）有限公司']]\n",
      "==========================\n",
      "('520c9118', 'ppmoney', Counter({2: 8, 0: 1}), array(['ppmoney'], dtype=object))\n",
      "[[nan 'ppmoney系统更改导致原先绑定的卡无法提现' 'ppmoney']]\n",
      "==========================\n",
      "('524cad00', '搜狐财经', Counter({0: 7, 1: 2}), array(['搜狐财经'], dtype=object))\n",
      "[[nan '调查丨记者发现9家房企涉嫌违规销售房产局称会依法查处_搜狐财经_搜狐网' '搜狐财经']]\n",
      "==========================\n",
      "('530f7351', '甜菜金融', Counter({0: 7, 2: 2}), array(['甜菜金融'], dtype=object))\n",
      "[['投资者提问：董秘您好！昨天您回复说天风证券已经于2017年将甜菜金融股权出...'\n",
      "  '投资者提问： 董秘您好！昨天您回复说天风证券已经于2017年将甜菜金融股权出售，为何2018年能仍通过天风证券营业部购买到推荐的甜菜金融产品？（可以随意致电甜菜金融投资者）而且截止2019年5月27日，为何通过天风证券移动办公系统经纪家在天风证券子公司目录里仍能查到天风天财（天丰天财原名）？请回复！已切图留念！ 董秘回答(天风证券sh601162)： 尊敬的投资者，您好！甜菜金融的产品仅能通过其自身平台购买。公司子公司的情况请以工商系统查询及公司在指定信息披露媒体发布的定期公告为准。感谢您对天风证券的关注。 查看更多董秘问答>> 文章关键词： 天风证券 甜菜 董秘 我要反馈 {IMG:1} {IMG:2} {IMG:3}'\n",
      "  '甜菜金融']]\n",
      "==========================\n",
      "('5f940d2c', '香港联兴金融控股集团有限公司', Counter({2: 5, 1: 4}), array(['香港联兴金融控股集团有限公司'], dtype=object))\n",
      "[[nan\n",
      "  '（http://gansu.gansudaily.com.cn/system/2014/10/24/015227666.shtml）再如上海市浦东新区法院审理的陈某某等人合同诈骗案，法院查明：被告人陈XX负责注册成立了香港联兴金融控股集团有限公司（以下简称联业金融公司），陈XX任董事长，被告人叶XX负责让他人开设公司网站，网站为www.lxgold.com'\n",
      "  '香港联兴金融控股集团有限公司']]\n",
      "==========================\n",
      "('647b7e3a', '平安保险', Counter({1: 5, 0: 2, 2: 2}), array(['平安保险'], dtype=object))\n",
      "[['今天上午来4s店提车，买的平安保险期限7月31号生效，业务员弄成8月31号了，然后我交了车款，没法提车能退保吗？我还着急提车回家呢@中国平安  ??@中国平安 @人民日报 @央视财经 上午10点多到现在业务员还没有给我解决方案，还在店里等着。平安想做百年企业，服务还不到位。 ?'\n",
      "  '今天上午来4s店提车，买的平安保险期限7月31号生效，业务员弄成8月31号了，然后我交了车款，没法提车能退保吗？我还着急提车回家呢@中国平安  ??@中国平安 @人民日报 @央视财经 上午10点多到现在业务员还没有给我解决方案，还在店里等着。平安想做百年企业，服务还不到位。 ?'\n",
      "  '平安保险']]\n",
      "==========================\n",
      "('6b680212', '链链金融', Counter({0: 8, 2: 1}), array(['链链金融'], dtype=object))\n",
      "[['链家旗下，婶婶，家理财更名为链链金融 北京 · 上地 ?' '链家旗下，婶婶，家理财更名为链链金融 北京 · 上地 ?' '链链金融']]\n",
      "==========================\n",
      "('729dda8a', '融360', Counter({0: 4, 2: 4, 1: 1}), array(['融360'], dtype=object))\n",
      "[[nan\n",
      "  '融360分析师对新快报记者表示,非法吸储只要金额达到100万元以上、投资者损失金额在50万元以上、报案人数在150人以上就可以达到立案标准,而且达到任何其一即可'\n",
      "  '融360']]\n",
      "==========================\n",
      "('730d0a5b', '鼎霖投资', Counter({0: 7, 2: 2}), array(['鼎霖投资'], dtype=object))\n",
      "[[nan '（上）2018-05-2309:22来源:鼎霖投资私募/基金原标题：区分正规私募还是非法集资，看这七点就够了' '鼎霖投资']]\n",
      "==========================\n",
      "('74886f8d', '链链金融', Counter({0: 7, 2: 2}), array(['链链金融'], dtype=object))\n",
      "[['网贷之家发布多项评级榜单，链链金融位列合规积分榜第六'\n",
      "  '榜单从多个维度对网贷平台进行评分，涵盖指标全面，具有较高的行业权威性。能在两项评级中同时入榜，也彰显出链链在积极拥抱监管、合规运营方面的努力和坚持。 1 链链金融积极拥抱监管，建立合规自查系统，恪守行业底线，自觉遵守各项规章制度，确保平台合规运营稳健发展。 银行存管系统 链链金融主动契合监管准则，上线新网银行存管系统，平台资金路径清晰可见，实现用户资金与平台的完全隔离; 严格风控管理 链链金融在贷前、贷中、贷后每个阶段进行全流程跟进，清晰掌握借款人的贷款需求、资质情况、资金去向和回款路径，及时获取项目最新情况及借款人信用变化，提前进行风险控制; 坚持小额分散出借 链链金融积极响应监管要求，坚持小额分散出借原则，每一笔出借资金平均流向上百个借款人，有效降低项目风险; 完善信批制度 《信批指引》发布后，链链金融主动跟进，及时准确批露平台信息和运营数据，从根源消除信息不对称，做到透明化、合规化运营。 2 链链金融严格遵从监管要求，坚持行业自律发展。从成立起，链链金融就树立起对网贷行业正确的态度，认真夯实基础，在面临行业压力时，依然能保持稳健的发展步伐，并在风控建设、信息批露、平台规范发展等方面，取得了一定的成果。 未来链链金融将通过不断努力，持续合规运营，促进行业健康发展，为广大出借人提供更多优质、可靠的金融产品和服务。 中国财经新闻网 Angel 张倩'\n",
      "  '链链金融']]\n",
      "==========================\n",
      "('763c0846', '优财网', Counter({1: 7, 2: 1, 0: 1}), array(['优财网'], dtype=object))\n",
      "[['优财网：刘士余缘何自首：南京银行债市一姐戴娟案或为导火线'\n",
      "  '感谢您对 优财网 的支持 {IMG:1} \\u3000声明：优财网所提供的信息仅供参考！若有疑议请发送邮件到lnddygya3@tom.com，我们将在2个工作日内审核处理。 ?? {IMG:2} 我要评论 已有[ 0]人参与 \\u3000验证码： {IMG:3} {IMG:4} 上一条：优财网：怎样稳增长下一条：优财网：山西：力推国资国企改革进入全国第'\n",
      "  '优财网']]\n",
      "==========================\n",
      "('787355ad', '农业众筹', Counter({0: 6, 1: 3}), array(['农业众筹'], dtype=object))\n",
      "[[nan '【农业众筹的模式有非法集资的风险么' '农业众筹']]\n",
      "==========================\n",
      "('7d064330', '钱宝网', Counter({2: 7, 0: 2}), array(['钱宝网'], dtype=object))\n",
      "[['最佳员工：钱宝网老板已自首 员工还在努力发推广'\n",
      "  ' {IMG:1} \\u3000\\u3000老板已自首，员工还在努力发微博……#年度最佳员工#大家学习一下，顺便别再往里面投钱了…… {IMG:2} 责任编辑：张玉 关键字 : 员工钱宝网推广 我要反馈'\n",
      "  '钱宝网']]\n",
      "==========================\n",
      "('7ea08f95', '掌中财富', Counter({0: 4, 2: 3, 1: 2}), array(['掌中财富'], dtype=object))\n",
      "[[nan\n",
      "  '掌中财富首席执行官吴坚宏指出，在这轮全行业的压力测试中，最先被淘汰的是涉嫌诈骗、自融的庞氏平台;其次是以大额超限标的为主的平台，因为风险集中爆发而难以为继;再者是存在资金池期限错配的平台，因为流动性困境而被迫出清;既而是资产质量不佳，风控能力不强的平台，由于资产状况恶化，逾期风险加剧;最后则是平台整体运营管理能力弱的平台，缺乏长期存续和盈利能力，最终在竞争中被淘汰'\n",
      "  '掌中财富']]\n",
      "==========================\n",
      "('7ff92999', '红岭创投', Counter({0: 8, 2: 1}), array(['红岭创投'], dtype=object))\n",
      "[['红岭创投正测算每年可收回款项 拟开设兑付款专用账户-天眼原创-网贷天眼'\n",
      "  ' 网贷天眼讯 4月15日，红岭创投再次发布公告称，公司正在对每年可收回借款人的回款进行认真、细致地测算；兑付安排需要出借人进行表决，相关表决程序、规则正在协商制定中。 ? 平台在公告中指出，近期平台召开与出借人代表见面会议，充分征求了出借人的不同意见，为充分照顾大多数出借人的利益，真正能够按兑付计划按时兑付出借款项，平台近期正全力以赴推动以下工作： ? 1、充分征求不同出借人群体的诉求； ? 2、公司正在对每年可收回借款人的回款进行认真、细致地测算； ? 3、技术部门正紧锣密鼓地对兑付安排的技术开发难度进行预估和测试，确保兑付安排能够准确及时不出差错； ? 4、兑付安排需要出借人进行表决，相关表决程序、规则正在协商制定中； ? 5、制定出借人监督委员会的设立和选举办法，以便兑付安排进行审议； ? 6、拟于4月15日在银行开设兑付款专用账户，积极落实银行账户的第三方中介监督机构，此过程预计需要一周时间。 ? 平台称兑付安排的初步意见征求稿需对以上内容进行安排和完善，公司正全力以赴加紧开展上述工作，最新进展将及时发布。 ? '\n",
      "  '红岭创投']]\n",
      "==========================\n",
      "('801fa398', '微天下', Counter({1: 8, 0: 1}), array(['微天下'], dtype=object))\n",
      "[[nan\n",
      "  '@千黛健康养生#反四风树新风#@陈里?@微天下?@澎湃新闻?政府部门发证民众相信了一分；官方媒体宣传，民众又相信一分；政府官员站台，民众又相信一分；法院判决未查证属于非法集资，民众又相信一分'\n",
      "  '微天下']]\n",
      "==========================\n",
      "('83aa4694', '上海联璧电子科技（集团）有限公司', Counter({2: 8, 1: 1}), array(['上海联璧电子科技（集团）有限公司'], dtype=object))\n",
      "[[nan\n",
      "  '联璧科技涉嫌非法吸储被调查警方带走多名工作人员-6月21日，《每日经济新闻》记者获悉，上海联璧电子科技（集团）有限公司（联璧金融运营主体公司）已被公安部门立案侦查，立案缘由是涉嫌非法吸收公众存款'\n",
      "  '上海联璧电子科技（集团）有限公司']]\n",
      "==========================\n",
      "('8675ad50', '钛媒体', Counter({1: 8, 0: 1}), array(['钛媒体'], dtype=object))\n",
      "[[nan '【钛媒体综合】6月13日，以狙击中概股闻名的浑水创始人CarsonBlock称，好未来欺诈性地创造利润，他在做空中国的好未来'\n",
      "  '钛媒体']]\n",
      "==========================\n",
      "('871b7228', '融360', Counter({2: 6, 1: 2, 0: 1}), array(['融360'], dtype=object))\n",
      "[[nan\n",
      "  '\\u3000\\u3000\\u3000\\u3000数据来源：融360\\u3000\\u3000\\u3000\\u3000数据来源：融360\\u3000\\u3000融360大数据研究院统计数据显示，43家上线银行存管的问题平台，其中25家提现困难、10家停业、5家跑路、2家经侦介入,1家良性退出'\n",
      "  '融360']]\n",
      "==========================\n",
      "('9220896a', '链链金融', Counter({0: 8, 2: 1}), array(['链链金融'], dtype=object))\n",
      "[['网贷之家发布多项评级榜单，链链金融位网贷之家发布多项评级榜单，链链金融位'\n",
      "  '榜单从多个维度对网贷平台进行评分，涵盖指标全面，具有较高的行业权威性。能在两项评级中同时入榜，也彰显出链链在积极拥抱监管、合规运营方面的努力和坚持。 1 链链金融积极拥抱监管，建立合规自查系统，恪守行业底线，自觉遵守各项规章制度，确保平台合规运营稳健发展。 银行存管系统 链链金融主动契合监管准则，上线新网银行存管系统，平台资金路径清晰可见，实现用户资金与平台的完全隔离; 严格风控管理 链链金融在贷前、贷中、贷后每个阶段进行全流程跟进，清晰掌握借款人的贷款需求、资质情况、资金去向和回款路径，及时获取项目最新情况及借款人信用变化，提前进行风险控制; 坚持小额分散出借 链链金融积极响应监管要求，坚持小额分散出借原则，每一笔出借资金平均流向上百个借款人，有效降低项目风险; 完善信批制度 《信批指引》发布后，链链金融主动跟进，及时准确批露平台信息和运营数据，从根源消除信息不对称，做到透明化、合规化运营。 2 链链金融严格遵从监管要求，坚持行业自律发展。从成立起，链链金融就树立起对网贷行业正确的态度，认真夯实基础，在面临行业压力时，依然能保持稳健的发展步伐，并在风控建设、信息批露、平台规范发展等方面，取得了一定的成果。 未来链链金融将通过不断努力，持续合规运营，促进行业健康发展，为广大出借人提供更多优质、可靠的金融产品和服务。'\n",
      "  '链链金融']]\n",
      "==========================\n",
      "('94b85e30', '币安', Counter({2: 6, 0: 3}), array(['币安'], dtype=object))\n",
      "[[nan\n",
      "  '我就是做法币交易平台，拿到了国家认可的执照，有银行的信托账号，一个普通人的钱存到交易平台时，其实是存到银行信托账号，不是币安私人的账号'\n",
      "  '币安']]\n",
      "==========================\n",
      "('95ce999b', 'ZJLT项目', Counter({1: 7, 2: 2}), array(['ZJLT项目'], dtype=object))\n",
      "[[nan '朱潘等ZJLT项目方被用户指责利用知名人士为虚假项目站台、无底线喊单不兑现承诺、挪用私募币投资、教唆他人恐吓维权者'\n",
      "  'ZJLT项目']]\n",
      "==========================\n",
      "('97068a46', 'PPmoney', Counter({1: 6, 0: 3}), array(['PPmoney'], dtype=object))\n",
      "[['平台测评|PPmoney:体量巨大，问题巨多的平台你敢投吗？'\n",
      "  ' 声明: 本文由( admin )原创编译，转载请保留链接: 平台测评|PPmoney:体量巨大，问题巨多的平台你敢投吗？'\n",
      "  'PPmoney']]\n",
      "==========================\n",
      "('9ba042a8', 'plustoken', Counter({2: 5, 0: 4}), array(['plustoken'], dtype=object))\n",
      "[[nan\n",
      "  '其最大风险是跑路，因为其智能狗模式意味着把自己的加密资产财富控制权交给plustoken，但你如果需要通过智能狗获利，并且希望发展自己的团队，这个又是必选，这是plustoken第一发展阶段的核心价值，也是众多模仿者抄袭的地方，智能狗对于plustoken就类似于qq、微信对于腾讯游戏的重要性，没有qq、微信这样的社交流量口，腾讯游戏不可能只手遮天'\n",
      "  'plustoken']]\n",
      "==========================\n",
      "('9c04a1a6', '汇付信息', Counter({0: 8, 2: 1}), array(['汇付信息'], dtype=object))\n",
      "[[nan\n",
      "  '????汇付信息华东业务总监刘德多表示，有些管理人迫于时间压力，会做一些虚假项目，比如股权类会去投一个空壳公司，几个月后清退，这些项目有的是虚假的，有的达不到投资标准，就是为了保壳去投，这是跟监管精神相违背的'\n",
      "  '汇付信息']]\n",
      "==========================\n",
      "('9c2a3e9c', '融360', Counter({0: 6, 2: 2, 1: 1}), array(['融360'], dtype=object))\n",
      "[[nan '??对于郭树清这句话的理解，融360大数据研究院主编殷燕敏认为，这句话并非网络曲解的，超过10%的理财产品就肯定会血本无归'\n",
      "  '融360']]\n",
      "==========================\n",
      "('9e492e85', '融360', Counter({0: 7, 2: 2}), array(['融360'], dtype=object))\n",
      "[[nan\n",
      "  '【政策逐步完善，网贷行业市场出清仍将继续】融360网贷数据显示，二季度网贷行业共计263家P2P平台出现问题，其中平台失联119家，提现困难93家'\n",
      "  '融360']]\n",
      "==========================\n",
      "('a1d481df', '宜信', Counter({2: 8, 0: 1}), array(['宜信'], dtype=object))\n",
      "[['宜信私募维权风波最新：已报中基协核查处理'\n",
      "  ' {IMG:1} 第57期：互联网汽车金融 《数说网贷》第57期：互联网汽车金融。贷款金额一般都在20万元以内的汽车消费贷款和汽车抵押贷款，将成为众多的P2P网贷平台业务转型的首选，未来互联网车贷市场的竞争，将更加激烈。... 查看全文'\n",
      "  '宜信']]\n",
      "==========================\n",
      "('a408f408', 'nan', Counter({0: 7, 1: 2}), array([nan], dtype=object))\n",
      "[['值得看——【此次中央政治局会议关于货币政策的表述未有太大亮点，下半年或主要在财政政策方面加力提效。其中需要重点关注的地方是中央政治局会议表明要落实房地产长效管理机制，不将房地产作为短期刺激经济的手段。而地产作为传统部门中信贷传导最重要的锚，它的受限或将在很大程度上影响 ...全文：  ?'\n",
      "  '值得看——【此次中央政治局会议关于货币政策的表述未有太大亮点，下半年或主要在财政政策方面加力提效。其中需要重点关注的地方是中央政治局会议表明要落实房地产长效管理机制，不将房地产作为短期刺激经济的手段。而地产作为传统部门中信贷传导最重要的锚，它的受限或将在很大程度上影响 ...全文： http://m.weibo.cn/1346463104/4400137775980772 ?'\n",
      "  nan]]\n",
      "==========================\n",
      "('a70c6a9d', '红岭创投', Counter({0: 6, 2: 3}), array(['红岭创投'], dtype=object))\n",
      "[['最新！红岭创投4月25日前进行兑付 现有资金1亿元-天眼原创-网贷天眼'\n",
      "  ' 网贷天眼讯 4月16日消息，红岭创投创始人周世平在红岭社区发布《最新进展汇总》称，平台现金账户余额均在1亿元左右，初步考虑本月25日之前开始兑付，本月只兑付两次，以后每月兑付四次，保障各位投资者基本生活保障。此外平台以红岭创投和投资宝为主，不包含亿钱贷。 ? 周世平称，近期兑付方案已经多轮讨论，因为是平台自主退出，而且历史遗留问题比较多，为了降低退出过程中的不确定风险，借鉴了深圳市良性退出文件中的部分内容，同时在降低人数及小额投资者方面做了一些特别考虑，应该说方案已经趋于成熟，现正在测试退出过程中，每个季度不良资产清收所提供的现金流，确保兑付方案的顺利进行。 ? 目前，红岭创投已将兑付款专用账户开好，并拟了出借人监督委员会选举办法，月底将正常兑付。 ? 此外，关于平台不良资产清收问题，周世平表示，长城资金预计将会有3.5亿元债权回购；山东某上市公司已经按约定完成手续，6月底之前按计划还款7100万。其余正在洽谈中。 ? 周世平表示，红岭创投清收委员会名单初定本周正式开始工作，磨合两周再确认正式名单；并承诺平台将不余遗力保障投资人的资金安全。 ? 以下为周世平在红岭社区发布《最新进展汇总》原文： ? {IMG:1} ? ? '\n",
      "  '红岭创投']]\n",
      "==========================\n",
      "('ab2ec010', '融360', Counter({1: 8, 0: 1}), array(['融360'], dtype=object))\n",
      "[[nan\n",
      "  '?整改有待提速?融360统计数据显示，截至2018年4月23日，全国共有832家正常运营的网贷平台上线银行资金存管系统，占融360监测范围内正常运营平台的53.16%；累计36家银行存管平台爆雷，涉及问题主要包括网贷平台跑路、提现困难、经侦介入等'\n",
      "  '融360']]\n",
      "==========================\n",
      "('b0f9ca14', '大宗商品交易', Counter({2: 8, 1: 1}), array(['大宗商品交易'], dtype=object))\n",
      "[[nan '（5）以大宗商品交易市场进行非法集资' '大宗商品交易']]\n",
      "==========================\n",
      "('b7ff7b2a', '支付宝', Counter({0: 8, 1: 1}), array(['支付宝'], dtype=object))\n",
      "[['投融快讯 | 网易投资法国游戏工作室Quantic Dream 美国“支付宝”Stripe再获1亿美元融资'\n",
      "  ' 【1月30日投融快讯】美国支付创企Stripe近期获得了Tiger Global Management 1亿美元的投资，本轮融资使该公司的估值超过了220亿美元。 本轮融资是Tiger Global去年领投的2.45亿美元融资轮次的后续行动，该公司去年的估值为200亿美元。该知情人士表示，这笔资金将用于增长领域，包括国际扩张。 1月29日，网易正式宣布一项新的投资，法国工作室Quantic Dream，而就在这个月初，前《炉石传说》游戏总监Ben Brode创办的Second Dinner宣布获得了网易的投资，目前网易对于游戏产业的投资正在逐渐步向海外。 Quantic Dream作为PS平台最优秀厂商之一，除了开发自己的游戏外，他们还为电影和其它游戏公司提供动作捕捉服务。 2005年，Quantic Dream的第二款产品《幻象杀手》上线，这款产品获得多个PS2和XBox年度游戏大奖，全平台年度最佳冒险游戏大奖等。 {IMG:1} （本文为艾瑞网独家原创稿件 转载请注明出处）'\n",
      "  '支付宝']]\n",
      "==========================\n",
      "('bf14b995', '创青春', Counter({0: 8, 1: 1}), array(['创青春'], dtype=object))\n",
      "[[nan\n",
      "  ';▲刘德林（中）和公司员工商讨文创产品的设计和构图从小就对中国传统文化有着执著的追求的刘德林在2016年7月8日，也就是他22岁的时候投资十几万元创建了陕西呦呦鹿鸣创意文化传播有限公司经过两年多时间的努力公司已成为行业内的佼佼者他本人也被聘为陕西青年企业家协会会员陕西文创产业发展联盟理事会成员全国大学生三创赛创业导师等在今年9月的第六届创青春陕西青年创新创业大赛中刘德林带领的团队获得了省级总决赛的二等奖▲第六届创青春省级总决赛颁奖现场-刘德林（左一）并在10月份代表陕西参加的苏高新杯第五届创青春中国青年创新创业大赛全国赛中获得了优胜奖全国赛现场-刘德林他这么年轻，就这么厉害家里肯定不简单吧相信会有很多青都会这样想这世上绝没有一蹴而就的成功那些厉害的人，一直很努力其实这个文创公司已经是刘德林的第三次创业了一年时间10万投资血本无归上大一那年，刘德林向父母借款10万元，利用假期和课余时间，在西安高新区花4万元租了一间门面房，2万元购买了一台牛奶加工机，雇了名大学生，开了一间鲜奶吧，三个人三班倒，从泾阳奶农的供奶站进货，每天生产140斤巴氏奶，零售加订单，销路不错，但成本太大入不敷出，运营不到一年时间，10万元投资血本无归，最后把机器卖了7000元'\n",
      "  '创青春']]\n",
      "==========================\n",
      "('c73d1272', 'ic', Counter({1: 8, 0: 1}), array(['ic'], dtype=object))\n",
      "[[nan\n",
      "  '（东方ic/图）自2015年至今，上海、常州、嘉兴等地公安机关侦办、披露的文物鉴定、拍卖诈骗案件已达34起，批捕或控制犯罪嫌疑人412人，已核实受害者五千余人'\n",
      "  'ic']]\n",
      "==========================\n",
      "('d1ed60e1', '班汇通', Counter({0: 5, 2: 4}), array(['班汇通'], dtype=object))\n",
      "[[nan '\\u3000\\u30007、在活动期间若出现恶意欺诈行为，一经发现班汇通平台有权取消其活动参与资格；' '班汇通']]\n",
      "==========================\n",
      "('e82bf011', '海象理财', Counter({2: 8, 1: 1}), array(['海象理财'], dtype=object))\n",
      "[['投资海象理财很久了，为老伙计证言一下'\n",
      "  ' 花开了 花开了 当前离线 金融秀才 {IMG:1} {IMG:2} 金融秀才, 积分 557, 距离下一级还需 443 积分 {IMG:3} {IMG:4} 金融秀才, 积分 557, 距离下一级还需 443 积分注册时间2015-8-5最后登录1970-1-1主题帖子诚信新币积分557 {IMG:5} {IMG:6} 电梯直达 {IMG:7} 楼主 {IMG:8} 发表于 昨天?22:44 |只看该作者  {IMG:9} |倒序浏览 |阅读模式 投资时间不长不短，听说p2p大概是08年吧，有段时间没有尝试，经过朋友介绍开始尝试投资，刚开始尝试大平台，后来朋友介绍的海象理财，朋友说看中海象就是低调朴实，经常举办线下活动也有很多小惊喜，一直很平稳的3年，自己很信任。现在经济大环境不好，海象也受到牵连，其实内心是理解的，我股票亏了50个点，我自己有信心等2年还会涨回来的，对待海象还是一个态度。我相信海象能度过这次难关，一点点兑付不要紧，毕竟现在能有兜底决心的企业没有了，相信海象，我愿意为他证言。 '\n",
      "  '海象理财']]\n",
      "==========================\n",
      "('ea045be1', '银桥网', Counter({2: 6, 0: 2, 1: 1}), array(['银桥网'], dtype=object))\n",
      "[[nan\n",
      "  '银桥网分析，本次网贷平台爆雷潮的主因在于，一是一些主打自融、虚假标的、资金池等庞氏骗局的平台在监管趋严下难以为继；二是流动性趋紧导致贷款端（尤其大额）逾期率上升、平台累计的准备金难以足额赔付；三是投资者资金流入放缓，导致存在期限错配的平台流动性问题凸显'\n",
      "  '银桥网']]\n",
      "==========================\n",
      "('ef2258cd', '百姓网', Counter({2: 8, 1: 1}), array(['百姓网'], dtype=object))\n",
      "[[nan\n",
      "  '情系百姓网,苏银霞非法吸储案庭审细节披露：从受害者成加害方-情系百姓网运用新闻传媒的手段，通过对老百姓普遍关心的社会时事剖析，扬善弃恶明达'\n",
      "  '百姓网']]\n",
      "==========================\n",
      "('f143aca4', '宜贷网', Counter({2: 7, 0: 2}), array(['宜贷网'], dtype=object))\n",
      "[['宜贷网发布公告称平台良性退出受阻 求司法审计协助'\n",
      "  ' \\u3000\\u3000财联社1月7日讯，日前宣布清盘的宜贷网近日再度发布公告称，平台《宜贷网良性退出征求意见稿》发出之后，出借人情绪激动、流言不断，认为平台套路清盘、自融套钱的不在少数，平台后续良性退出工作无法正常开展。在清退过程中，宜贷网亟需相关政府部门的帮助，恳请相关监管部门伸出援手，介入宜贷网良性退出工。此外，宜贷网还发布《给宜贷网平台广大出借人和有关部门的说明》中表示，目前债权能足额覆盖未偿还本金，未偿还收益也能在未来的催收及司法诉讼中能全部偿还，将保障所有出借人收到本息，所有出借人收回本金之后，再支付利息、未来的收益。（财联社记者 姜樊） 责任编辑：赵子牛 我要反馈 {IMG:1} {IMG:2} {IMG:3} '\n",
      "  '宜贷网']]\n",
      "==========================\n",
      "('fc17c182', '海象理财', Counter({2: 7, 1: 2}), array(['海象理财'], dtype=object))\n",
      "[['因为海象坑钱的事 这一年心情巨暴躁 时不时半夜三更情绪崩溃 没和家里说 一个人默默承受 唯一知道的就是当初推荐我海象理财的亲戚 他暴雷前全部取出来了 我感叹你运气真好 他说他朋友和他说不安全有隐患建议他取出来 他就取出来了顺利避雷 我说那你不顺便和我也说下 你推荐我的海象 你也知道我用海象 ...全文：  ?'\n",
      "  '因为海象坑钱的事 这一年心情巨暴躁 时不时半夜三更情绪崩溃 没和家里说 一个人默默承受 唯一知道的就是当初推荐我海象理财的亲戚 他暴雷前全部取出来了 我感叹你运气真好 他说他朋友和他说不安全有隐患建议他取出来 他就取出来了顺利避雷 我说那你不顺便和我也说下 你推荐我的海象 你也知道我用海象 ...全文： http://m.weibo.cn/5330900845/4392121148160059 ?'\n",
      "  '海象理财']]\n",
      "==========================\n",
      "('ff689d8a', '宜贷网', Counter({0: 7, 2: 2}), array(['宜贷网'], dtype=object))\n",
      "[['宜贷网向广大出借人和有关部门发布说明'\n",
      "  ' 宜贷网向广大出借人和有关部门发布说明 快讯 零壹财经 零壹财经 2019-01-07 出借人 宜贷网 1月6日，宜贷网发布《给宜贷网平台广大出借人和有关部门的说明》。说明称，出借人中有88%比例的出借人本金尚未收回。平台和管理团队有信心在未来3-5年内催回投友的未偿还本金，避免监管政策风险，并将为之不懈努力，团队无退路。 0 '\n",
      "  '宜贷网']]\n",
      "==========================\n"
     ]
    }
   ],
   "source": [
    "for h in hesitate_long_rmed:\n",
    "    print(h)\n",
    "    print(test_df[test_df['id']==h[0]][['title','text','entity']].values)\n",
    "    print('============='*2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "rs_list = []\n",
    "for (id,key),df in group:\n",
    "    label = Counter(df['predict_labels']).most_common(1)[0][0]\n",
    "    rs_list.append((id,key,label))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw_df = pd.DataFrame(rs_list,columns=['id','key_entity','predict_label'])[['id','predict_label','key_entity']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "# raw_df['key_entity'] = entity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "rs_map = {}\n",
    "for id,label,entity in raw_df.values:\n",
    "    if id not in rs_map:\n",
    "        rs_map[id] = ([label],[entity])\n",
    "    else:\n",
    "        rs_map[id][0].append(label)\n",
    "        rs_map[id][1].append(entity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([1, 1, 1], ['大学生贷', '花呗', '蚂蚁金服'])"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rs_map['e9abc6b0']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "items = []\n",
    "for k,v in rs_map.items():\n",
    "    labels,entities = v\n",
    "    senti = int(np.mean(labels) >= 1)\n",
    "    keys = []\n",
    "    for l,e in zip(labels,entities):\n",
    "        if l==2:\n",
    "            keys.append(e)\n",
    "    key_entity = ';'.join(keys)\n",
    "    if len(keys)==0 or senti==0:\n",
    "        key_entity = np.nan\n",
    "    items.append((k,senti,key_entity))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>negative</th>\n",
       "      <th>key_entity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>00049297</td>\n",
       "      <td>1</td>\n",
       "      <td>小资钱包;资易贷;资易贷金融信息服务有限公司</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>000b8b75</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0012d20a</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0033ebe3</td>\n",
       "      <td>1</td>\n",
       "      <td>联璧金融</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>003b1540</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4995</td>\n",
       "      <td>ffa46c98</td>\n",
       "      <td>1</td>\n",
       "      <td>小资钱包;资易贷</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4996</td>\n",
       "      <td>ffc0005d</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4997</td>\n",
       "      <td>ffd1497a</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4998</td>\n",
       "      <td>fff09e68</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4999</td>\n",
       "      <td>fffe28dd</td>\n",
       "      <td>1</td>\n",
       "      <td>黑火金融</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5000 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            id  negative              key_entity\n",
       "0     00049297         1  小资钱包;资易贷;资易贷金融信息服务有限公司\n",
       "1     000b8b75         0                     NaN\n",
       "2     0012d20a         0                     NaN\n",
       "3     0033ebe3         1                    联璧金融\n",
       "4     003b1540         0                     NaN\n",
       "...        ...       ...                     ...\n",
       "4995  ffa46c98         1                小资钱包;资易贷\n",
       "4996  ffc0005d         0                     NaN\n",
       "4997  ffd1497a         0                     NaN\n",
       "4998  fff09e68         0                     NaN\n",
       "4999  fffe28dd         1                    黑火金融\n",
       "\n",
       "[5000 rows x 3 columns]"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "raw_df = pd.DataFrame(items,columns=['id','negative','key_entity'])\n",
    "raw_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "#去重：把更短的去掉\n",
    "import numpy as np\n",
    "def remove_short_entity_by_long(entity_str):\n",
    "    \"\"\"\n",
    "    除去key_entity中同一实体的较短名称\n",
    "    :param entity_str:\n",
    "    :return:\n",
    "    \"\"\"\n",
    "    if not isinstance(entity_str, str):\n",
    "        return entity_str\n",
    "    entities = entity_str.split(';')\n",
    "    states = np.ones(len(entities))\n",
    "    for i, e in enumerate(entities):\n",
    "        for p in entities:\n",
    "            if e in p and len(e) < len(p):\n",
    "                print('removed %s by %s'%(e,p))\n",
    "                states[i] = 0\n",
    "    rs = []\n",
    "    for i, e in enumerate(entities):\n",
    "        if states[i] == 1:\n",
    "            rs.append(e)\n",
    "    rs = ';'.join(rs)\n",
    "    return rs\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_trans_map():\n",
    "    from data_utils.basic_data import load_basic_dataset\n",
    "    train_df = load_basic_dataset('train')\n",
    "    srcs = train_df['entity'].map(lambda x :list(str(x).split(';')))\n",
    "    dests =  train_df['key_entity'].map(lambda x :list(str(x).split(';')))\n",
    "    trans_map = {}\n",
    "    for srcs,dests in list(zip(srcs,dests)):\n",
    "        for src in srcs:\n",
    "            if src == '':\n",
    "                continue\n",
    "            for e in srcs:\n",
    "                if e== '':\n",
    "                    continue\n",
    "                if (src in e or e in src) and e!=src:\n",
    "                    if src in dests:\n",
    "                        trans_map[src+'-'+e] = src\n",
    "                        trans_map[e+'-'+src] = src\n",
    "                    if e in dests:\n",
    "                        trans_map[src+'-'+e] = e\n",
    "                        trans_map[e+'-'+src] = e\n",
    "    return trans_map\n",
    "def trans_keys(trans_map,entity_str):\n",
    "    if not isinstance(entity_str,str):\n",
    "        return entity_str\n",
    "    es = list(filter(lambda x:str(x).strip()!='',entity_str.split(';')))\n",
    "    rs = set()\n",
    "    for e in es:\n",
    "        finded = False\n",
    "        for y in es:\n",
    "            if e+'-'+y in trans_map and e!=y:\n",
    "                rs.add(trans_map[e+'-'+y])\n",
    "                finded = True\n",
    "        if not finded:\n",
    "            rs.add(e)\n",
    "    if len(rs) > 0:\n",
    "        rs = ';'.join(list(rs))\n",
    "    else:\n",
    "        rs = np.nan\n",
    "    return rs\n",
    "trans_map = get_trans_map()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "rs_df = raw_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>negative</th>\n",
       "      <th>key_entity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>4440</td>\n",
       "      <td>e55a3377</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            id  negative key_entity\n",
       "4440  e55a3377         1        NaN"
      ]
     },
     "execution_count": 110,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "removed 米咖 by 米咖网\n",
      "removed 成都小额贷款 by 成都小额贷款公司\n",
      "removed 随手记 by 随手记钱包\n",
      "removed 钱包 by 随手记钱包\n",
      "removed 海钜信达 by 深圳市海钜信达投资发展有限公司\n",
      "removed 宜贷网 by ?宜贷网\n",
      "removed 天下投 by 深圳市富通天下投资管理有限公司\n",
      "removed 富通天下 by 深圳市富通天下投资管理有限公司\n",
      "removed 深圳市富通 by 深圳市富通天下投资管理有限公司\n",
      "removed 两只老虎 by 两只老虎理财\n",
      "removed 老虎理财 by 两只老虎理财\n",
      "removed 陆金所 by 西部陆金所\n",
      "removed 恒隆小额贷款 by 广州市恒隆小额贷款有限公司\n",
      "removed 小额贷 by 恒隆小额贷款\n",
      "removed 小额贷 by 广州市恒隆小额贷款有限公司\n",
      "removed 小额贷 by 小额贷款有限公司\n",
      "removed 小额贷款有限公司 by 广州市恒隆小额贷款有限公司\n",
      "removed 宜贷网 by 宜贷网（原易贷网）\n",
      "removed 易贷网 by 宜贷网（原易贷网）\n",
      "removed 无忧借条 by ????无忧借条\n",
      "removed 嘉盛 by 嘉盛国际\n",
      "removed 雪橙 by 雪橙金服\n",
      "removed 智融财富 by 深圳市智融财富电商投资有限公司\n",
      "removed 高盛国际 by gs-forex高盛国际\n",
      "removed 山东海倍电子商务 by 山东海倍电子商务股份有限公司\n",
      "removed 国润 by 江西国润\n",
      "removed 随行付支付 by 随行付支付有限公司山西分公司\n",
      "removed 随行付 by 随行付支付\n",
      "removed 随行付 by 随行付支付有限公司山西分公司\n",
      "removed 粤融泰富 by 广东粤融泰富网络信息服务有限公司\n",
      "removed 天合联盟 by 安徽天合联盟科技有限公司\n",
      "removed 天合 by 天合联盟\n",
      "removed 天合 by 安徽天合联盟科技有限公司\n",
      "removed 北京华澳融信 by 北京华澳融信国际投资管理咨询有限公司\n",
      "removed 北京华澳翼时代 by 北京华澳翼时代信息技术有限责任公司\n",
      "removed 渤海创投 by 渤海创投集团通\n",
      "removed 云财富 by 外滩云财富\n",
      "removed 易贷金融 by  (北京)资易贷金融信息服务有限公司\n",
      "removed 易商通 by 北京易商通科技有限公司\n",
      "removed 红太阳 by 湖南红太阳电源新材料股份有限公司\n",
      "removed 深圳光彩 by 深圳光彩投资控股集团有限公司\n",
      "removed 壹佰金融 by 深圳壹佰金融\n",
      "removed 和耕传承基金 by 和耕传承基金销售有限公司控股\n",
      "removed 宜信 by 宜信惠民\n",
      "removed 宜信 by 宜信普惠\n",
      "removed 瑞波 by 瑞波币\n",
      "removed 滴水贷 by ????滴水贷\n",
      "removed 翱晟投资 by 台州翱晟投资公司\n",
      "removed 高新盛 by 深圳高新盛\n",
      "removed 深圳高新 by 深圳高新盛\n",
      "removed 麒麟金融 by 麒麟金融集团有限公司\n",
      "removed 天天投 by 天天投金融\n",
      "removed 麒麟金融 by 麒麟金融集团有限公司\n",
      "removed 华金融 by 高仕华金融\n",
      "removed 汇鑫小额贷款 by 汇鑫小额贷款有限公司\n",
      "removed 小额贷款有限公司 by 汇鑫小额贷款有限公司\n",
      "removed 联璧 by 联璧金融\n",
      "removed 小资钱包 by 北京资易贷金融信息服务有限公司（简称小资钱包）\n",
      "removed 麒麟金融 by 麒麟金融集团有限公司\n",
      "removed 高新盛 by 深圳高新盛创投电子商务有限公司\n",
      "removed 深圳高新 by 深圳高新盛创投电子商务有限公司\n",
      "removed 齐鲁商品 by 齐鲁商品交易中心\n",
      "removed 亚太投资 by 北京亚太投资\n",
      "removed 汇聚财富 by 上海汇聚财富\n",
      "removed 渤海创投 by ?渤海创投子公司智慧蜂巢\n",
      "removed 小额贷 by 小额贷款有限公司\n",
      "removed 小额贷 by 中合华惠农村小额贷款有限公司\n",
      "removed 小额贷款有限公司 by 中合华惠农村小额贷款有限公司\n",
      "removed 御顺金融 by 成都御顺金融贷款公司\n",
      "removed 盛付通 by 上海盛付通电子支付服务有限公司陕西分公司\n",
      "removed 星投资 by 泽星投资\n",
      "removed 亿金融 by 优亿金融\n",
      "removed 恒宇 by 恒宇天泽\n",
      "removed 圣盈信 by 圣盈信CIFS\n",
      "removed 零钱罐 by 零钱罐APP\n",
      "removed 小额贷 by 万源农村小额贷款有限公司与沈秋云\n",
      "removed 小额贷 by 小额贷款有限公司\n",
      "removed 小额贷款有限公司 by 万源农村小额贷款有限公司与沈秋云\n",
      "removed 节节贷 by 广西弘尚节节贷集团(节节资本)\n",
      "removed 节节贷 by 广西弘尚节节贷集团\n",
      "removed 广西弘尚节节贷集团 by 广西弘尚节节贷集团(节节资本)\n",
      "removed 资易贷 by 资易贷北京金融信息服务有限公\n",
      "removed 点融 by 点融网\n",
      "removed 中南大宗 by 中南大宗商品电子商务有限公司\n",
      "removed 智云 by 智云金融\n",
      "removed 易贷金融 by  (北京)资易贷金融信息服务有限公司\n",
      "removed 玖富 by 玖富?投诉量最多\n",
      "removed fx by fx福克斯\n",
      "removed fx by onefx\n",
      "removed 麒麟金融 by 麒麟金融集团有限公司\n",
      "removed 米融 by 易米融\n",
      "removed 中吴财富 by 上海中吴财富投资管理集团有限公司\n",
      "removed 节节贷 by 广西弘尚节节贷网络信息服务集团有限公司\n",
      "removed 兴业财富 by 北方兴业财富\n",
      "removed 贵金属 by 贵金属投资\n",
      "removed 沃德 by 沃德斯国际\n",
      "removed 蜜蜂 by 蜜蜂财富\n",
      "removed 中银消费金融 by 中银消费金融有限公司\n",
      "removed 中银 by 中银消费金融有限公司\n",
      "removed 中银 by 中银消费金融\n",
      "removed 钱宝 by 钱宝财\n",
      "removed 房融 by 房融所\n",
      "removed 随行付支付 by 随行付支付有限公司\n",
      "removed 随行付 by 随行付支付\n",
      "removed 随行付 by 随行付支付有限公司\n",
      "removed 资易贷 by 资易贷(北京）金融信息有限公司公司\n",
      "removed 冠群驰骋 by 冠群驰骋商务信息咨询（天津）有限公司\n",
      "removed 优宝 by 优宝汇\n",
      "removed 青岛九州商品交易中心 by 青岛九州商品交易中心有限公司\n",
      "removed 海贷 by 海贷金服体验金\n",
      "removed 海贷 by 海贷金服\n",
      "removed 海贷金服 by 海贷金服体验金\n",
      "removed 二元期权 by 金盛二元期权\n",
      "removed 钱宝 by 钱宝财\n",
      "removed 世纪贷 by 世纪贷互联网金融服务有限公司\n",
      "removed 海象理财 by 北京海象理财\n",
      "removed 银通投资 by 中润银通投资北京有限公司\n",
      "removed 华夏信财 by 华夏信财信息咨询（上海）有限公司芜湖分公司\n",
      "removed 小资钱包 by 北京资易贷金融信息服务有限公司（简称小资钱包）\n",
      "removed 仁和融兴 by 青岛仁和融兴投资有限公司\n",
      "removed 海象理财 by 北京海象理财\n",
      "removed 商银信支付 by 商银信支付服务有限公司\n",
      "removed 钱海湾 by 钱海湾金融\n",
      "removed 钱海湾 by 钱海湾金融公司\n",
      "removed 钱海湾金融 by 钱海湾金融公司\n",
      "removed 河北滨海大宗商品交易市场 by 河北滨海大宗商品交易市场服务有限公司\n",
      "removed 大宗商品交易 by 河北滨海大宗商品交易市场\n",
      "removed 大宗商品交易 by 河北滨海大宗商品交易市场服务有限公司\n",
      "removed 优库速购 by 深圳优库速购\n",
      "removed igofx by igofx平台\n",
      "removed 时贷 by 大时贷\n",
      "removed 市大时代 by 深圳市大时代资产管理有限公司\n",
      "removed 小资钱包 by 北京资易贷金融信息服务有限公司（简称小资钱包）\n",
      "removed 星投资 by 泽星投资\n",
      "removed 易贷金融 by  (北京)资易贷金融信息服务有限公司\n",
      "removed 小资钱包 by 北京资易贷金融信息服务有限公司（简称小资钱包）\n",
      "removed 信e贷 by 久信e贷\n",
      "removed 中赢投 by 中赢投资\n",
      "removed 玖财 by 玖财通\n",
      "removed 懒财主 by 海懒财主金融信息服务（深圳）有限公司\n",
      "removed 懒财主 by 前海懒财主金融\n",
      "removed 和信 by 资和信\n",
      "removed 宜贷网 by 宜贷网（原易贷网）\n",
      "removed 易贷网 by 宜贷网（原易贷网）\n",
      "removed 米融 by 易米融\n",
      "removed 日融财富 by 宁波日融财富投资管理有限公司\n",
      "removed 蜜蜂 by 蜜蜂财富\n",
      "removed 小资钱包 by 资易贷（小资钱包）\n",
      "removed 资易贷 by 资易贷（小资钱包）\n",
      "removed 小资钱包 by 小资钱包公司(资易贷平台)\n",
      "removed 资易贷 by 小资钱包公司(资易贷平台)\n",
      "removed 以太坊 by 以太坊ETH\n",
      "removed 上海易贷网 by 上海易贷网金融信息服务有限公司\n",
      "removed 上海文化产权交易所 by 上海文化产权交易所股份有限公司\n",
      "removed 中子星投资 by 中子星投资有限公司\n",
      "removed 星投资 by 中子星投资\n",
      "removed 星投资 by 中子星投资有限公司\n",
      "removed 36氪 by 36氪股权众筹\n",
      "removed 小资钱包 by 北京资易贷金融信息服务有限公司（简称小资钱包）\n",
      "removed 联金所 by 联金所康\n",
      "removed 金易融 by 金易融（北京）网络科技有限公司\n",
      "removed 钱宝 by 钱宝财\n",
      "removed 信和大金融 by 传信和大金融\n",
      "removed 宝银创赢 by 上海宝银创赢投资管理有限公司\n",
      "removed 创赢投资 by 上海宝银创赢投资管理有限公司\n",
      "removed 凯福德 by KFD凯福德\n",
      "removed 瑞财 by 恒瑞财富\n",
      "removed 中海投资 by 上海中海投资产管理有限公司\n",
      "removed 中海投 by 中海投资\n",
      "removed 中海投 by 上海中海投资产管理有限公司\n",
      "removed 易贷金融 by （北京）资易贷金融信息服务有限公司\n",
      "removed 唐小僧 by 唐小僧之后\n",
      "removed 北京华澳融信 by 北京华澳融信国际投资管理咨询有限公司\n",
      "removed 米融 by 易米融\n",
      "removed 长征财富 by 长征财富资产管理有限公司宝应支公司\n",
      "removed 老虎金融 by 老虎金融信息服务（北京）有限公司\n",
      "removed 冠群驰骋 by 冠群驰骋投资关联(北京)有限公司\n",
      "removed 冠群驰骋 by 冠群驰骋投资管理(北京)有限公司\n",
      "removed 小资钱包 by  资易贷(小资钱包）\n",
      "removed 小额贷 by 小额贷款有限公司\n",
      "removed 小额贷 by 金久农村小额贷款有限公司\n",
      "removed 小额贷款有限公司 by 金久农村小额贷款有限公司\n",
      "removed 银网贷 by 超银网贷\n",
      "removed tbc by tbc（海湾资本）澳大利亚asic\n",
      "removed 翱晟投资 by 台州翱晟投资公司\n",
      "removed 聚财猫 by ????聚财猫\n",
      "removed 小资钱包 by 北京资易贷公司（小资钱包)\n",
      "removed 资易贷 by 北京资易贷公司（小资钱包)\n",
      "removed 仁和融兴 by 青岛仁和融兴投资有限公司\n",
      "removed 日融财富 by 宁波日融财富投资管理有限公司\n",
      "removed 钱吧 by 弹钱吧\n",
      "removed 冠e通 by 冠e通平台\n",
      "removed 小资钱包 by 北京资易贷金融信息服务有限公司（简称小资钱包）\n",
      "removed 一号家居网 by 一号家居网装饰公司\n",
      "removed 易商通 by 北京易商通科技有限公司\n",
      "removed 麒麟金融 by 麒麟金融集团有限公司\n",
      "removed 山东海倍电子商务 by 山东海倍电子商务股份有限公司\n",
      "removed 简贷 by 易简贷\n",
      "removed 汇金方格 by 汇金方格（北京）投资管理有限公司\n",
      "removed 融通资产 by 北京圆融通资产管理有限公司\n",
      "removed 资易贷 by 海淀资易贷平台\n",
      "removed 惠金服 by 城惠金服\n",
      "removed 小资钱包 by 北京资易贷金融信息服务有限公司（简称小资钱包）\n",
      "removed 乐贷 by 网乐贷\n",
      "removed 易商通 by 北京易商通科技有限公司\n",
      "removed 誉东方投资管理 by 深圳市誉东方投资管理平台\n",
      "removed 东方投 by 深圳市誉东方投资管理平台\n",
      "removed 东方投 by 誉东方投资管理\n",
      "removed 理财帝 by 理财帝国\n",
      "removed 冠群驰骋 by ????冠群驰骋\n",
      "removed 大宗商品交易 by 河北滨海大宗商品交易市场\n",
      "removed 时贷 by 森昊好时贷\n",
      "removed 贷联盟 by 新贷联盟\n",
      "removed 云金融 by 景云金融\n",
      "removed 澳瑞克 by OracleFX澳瑞克\n",
      "removed 新余铭沃 by 新余铭沃投资管理中心\n",
      "removed 赛伯乐绿科 by 深圳赛伯乐绿科投资管理有限公司\n",
      "removed 重庆赛伯乐盈科 by 重庆赛伯乐盈科股权投资基金管理有限公司\n",
      "removed 中吴财富 by 上海中吴财富投资管理集团有限公司\n",
      "removed 资易贷 by 资易贷（北京）金融信息服务公司\n",
      "removed 汇聚财富 by 上海汇聚财富投资\n",
      "removed 联璧 by 联璧金融\n",
      "removed 渤海商品交易 by 天津渤海商品交易所\n",
      "removed 富民投资 by 富民投资网\n",
      "removed 银通投资 by 中润银通投资(北京)有限公司\n",
      "removed 北京华澳融信 by 北京华澳融信（余盆网）\n",
      "removed 余盆网 by 北京华澳融信（余盆网）\n",
      "removed 三农金服 by 深圳三农金服\n",
      "removed 资易贷 by 资易贷（北京）金融信息服务有限公司旗\n",
      "removed 钱包 by 钱包网\n",
      "removed 卡宝 by 卡宝典\n",
      "removed 玖富 by 北京玖富集团\n",
      "removed 小资 by 小资钱包\n",
      "removed 嘉盛 by 嘉盛国际\n",
      "removed 时贷 by 大时贷\n",
      "removed 钱宝 by 钱宝财\n",
      "removed 渤海商品交易 by 天津渤海商品交易所\n",
      "removed 渤海商品交易 by 渤海商品交易所\n",
      "removed 渤海商品交易所 by 天津渤海商品交易所\n",
      "removed 中银消费金融 by 中银消费金融有限公司\n",
      "removed 中银 by 中银消费金融有限公司\n",
      "removed 中银 by 中银消费金融\n",
      "removed 富民投资 by 富民投资网\n",
      "removed 宜贷网 by ?宜贷网\n",
      "removed 米咖网 by nine\n",
      "removed 国投信达集团 by nine\n",
      "removed 国信中融 by nine\n",
      "removed 小葱 by nine\n",
      "removed 乐金所 by nine\n",
      "removed 元宝365 by nine\n",
      "removed 工银金融 by nine\n",
      "removed 众可贷 by nine\n",
      "removed 秒钱 by nine\n",
      "removed 借贷宝 by nine\n",
      "removed 中赢金融 by nine\n",
      "removed 余额宝 by nine\n",
      "removed 诺诺镑客 by nine\n",
      "removed 上海骏合金融信息服务有限公司 by nine\n",
      "removed 微信 by nine\n",
      "removed 招联金融 by nine\n",
      "removed 蚂蚁借呗 by nine\n",
      "removed 财付通 by nine\n",
      "removed 速贷网 by nine\n",
      "removed 君享金融 by nine\n",
      "removed 聚才道 by nine\n",
      "removed kci by nine\n",
      "removed 罗麦科技 by nine\n",
      "removed 芒果金融 by nine\n",
      "removed 优品 by nine\n",
      "removed 惠农贷 by nine\n",
      "removed 龙网 by nine\n",
      "removed 华金融 by nine\n",
      "removed 借呗 by nine\n",
      "removed 泽星投资 by nine\n",
      "removed 365金融 by nine\n",
      "removed 浙商银行 by nine\n",
      "removed 盒子支付 by nine\n",
      "removed 宜信 by nine\n",
      "removed 小额贷 by nine\n",
      "removed 云金融 by nine\n",
      "removed 华融信 by nine\n",
      "removed 盒子支付 by nine\n",
      "removed 云端金融 by nine\n",
      "removed 高新投 by nine\n",
      "removed 工银金融 by nine\n",
      "removed 众可贷 by nine\n",
      "removed 金道 by nine\n",
      "removed 贵金属 by nine\n",
      "removed 大宗商品交易 by nine\n",
      "removed 徽商银行 by nine\n",
      "removed Ｅ镑客 by nine\n",
      "removed fomo3d by nine\n",
      "removed 中赢金融 by nine\n",
      "removed 火币 by nine\n",
      "removed 光汇云油 by nine\n",
      "removed 金盛二元期权 by nine\n",
      "removed fomo3d by nine\n",
      "removed 砍柴网 by nine\n",
      "removed 众可贷 by nine\n",
      "removed 乐金所 by nine\n",
      "removed 诺诺镑客 by nine\n",
      "removed 工银金融 by nine\n",
      "removed 众可贷 by nine\n",
      "removed 泽星投资 by nine\n",
      "removed 懒财宝 by nine\n",
      "removed 联安贷 by nine\n",
      "removed 中泰证券 by nine\n",
      "removed 中泰国际 by nine\n",
      "removed 汇付天下 by nine\n",
      "removed 借贷宝 by nine\n",
      "removed 无忧车贷 by nine\n",
      "removed 及贷 by nine\n",
      "removed 多米 by nine\n",
      "removed 轻松筹 by nine\n",
      "removed 余额宝 by nine\n",
      "removed 宜信 by nine\n",
      "removed 工商银行 by nine\n",
      "removed 中业兴融 by nine\n",
      "removed 大宗商品交易 by nine\n",
      "removed 海南如意岛公司 by nine\n",
      "removed 宜信 by nine\n",
      "removed 乐金所 by nine\n",
      "removed 盒子支付 by nine\n",
      "removed 宜信 by nine\n",
      "removed 南京银行 by nine\n",
      "removed 火币 by nine\n",
      "removed 米咖网 by nine\n",
      "removed 网乐贷 by nine\n",
      "removed 洋钱罐 by nine\n",
      "removed 和耕传承基金 by nine\n",
      "removed 贵金属 by nine\n",
      "removed 杭州满溢网络科技有限公司 by nine\n",
      "removed 普顿 by nine\n",
      "removed 沪深理财 by nine\n",
      "removed 景云金融 by nine\n",
      "removed fomo3d by nine\n",
      "removed 小花钱包 by nine\n",
      "removed 微信 by nine\n",
      "removed 稳盈宝 by nine\n",
      "removed 诺诺镑客 by nine\n",
      "removed 发隆金融 by nine\n",
      "removed 金贝 by nine\n",
      "removed 宜信 by nine\n",
      "removed 投客网 by nine\n",
      "removed 汇投资 by nine\n"
     ]
    }
   ],
   "source": [
    "rs_df['key_entity'] = rs_df['key_entity'].map(lambda x: trans_keys(trans_map,x)).map(remove_short_entity_by_long).map(remove_nine)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
    "negatives = rs_df.loc[rs_df['negative']==1 ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [],
   "source": [
    "negative_nan = negatives[negatives['key_entity'].map(lambda x:isinstance(x,float))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/njuciairs/anaconda3/envs/tftorch/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "negative_nan['entity'] = negative_nan['id'].map(lambda x:test_df[test_df['id']==x]['entity'].values[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/njuciairs/anaconda3/envs/tftorch/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>negative</th>\n",
       "      <th>key_entity</th>\n",
       "      <th>entity</th>\n",
       "      <th>log</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>84</td>\n",
       "      <td>042c0a9e</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>国投信达集团;国信中融</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>146</td>\n",
       "      <td>077a7f27</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>支付宝;蚂蚁金服;钱吧</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>240</td>\n",
       "      <td>0bc31991</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>工银金融;众可贷</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>266</td>\n",
       "      <td>0d21f2cb</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>新华财富</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>290</td>\n",
       "      <td>0e8c0f14</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>秒钱;借钱</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4440</td>\n",
       "      <td>e55a3377</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>支付宝;微信</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4526</td>\n",
       "      <td>e9abc6b0</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>蚂蚁金服;花呗;大学生贷</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4788</td>\n",
       "      <td>f6036994</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>宜信</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4856</td>\n",
       "      <td>f93cbbc2</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>爱公益</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4992</td>\n",
       "      <td>ff9315ec</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>大家赚;块钱;零花钱;汇投资</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>70 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            id  negative key_entity          entity  log\n",
       "84    042c0a9e         1        NaN     国投信达集团;国信中融    0\n",
       "146   077a7f27         1        NaN     支付宝;蚂蚁金服;钱吧    0\n",
       "240   0bc31991         1        NaN        工银金融;众可贷    0\n",
       "266   0d21f2cb         1        NaN            新华财富    0\n",
       "290   0e8c0f14         1        NaN           秒钱;借钱    0\n",
       "...        ...       ...        ...             ...  ...\n",
       "4440  e55a3377         1        NaN          支付宝;微信    0\n",
       "4526  e9abc6b0         1        NaN    蚂蚁金服;花呗;大学生贷    0\n",
       "4788  f6036994         1        NaN              宜信    0\n",
       "4856  f93cbbc2         1        NaN             爱公益    0\n",
       "4992  ff9315ec         1        NaN  大家赚;块钱;零花钱;汇投资    0\n",
       "\n",
       "[70 rows x 5 columns]"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "negative_nan['log'] = 0\n",
    "negative_nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "【10-06分秒必赚聊闲】目前春晚推荐的，秒钱理财，2016---2018连续三年广告，本人受害8万，是15000余名受害者之一，，2018-8-15开始逾期宣布，腾讯qq红包推荐，可是突然宣布逾期，平台负责人推卸责任，把法人代表更换了多次，2017-8月就开始换人，到2018-8-15，这是提前预备好骗老百姓钱的，侵吞了我们老百姓的看病钱，养老钱，秒钱理财，郭龙欣，李岩，合作人河南经协供应链有限公司大股东王亮孙国防洗黑钱利用河南的商户开户，自融我们老百姓的出借钱，目前王亮还在逍遥法外，谁来为我们老百姓做主啊，，救救我们啊，请给我们老百姓还本金吧，不要在无良知下去啦\n",
      "1076    秒钱;借钱\n",
      "Name: entity, dtype: object\n"
     ]
    }
   ],
   "source": [
    "sample = test_df[test_df['id']=='0e8c0f14']\n",
    "print(sample['text'].values[0])\n",
    "print(sample['entity'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "每当我看到有大学生贷这个我就？还有卖卵广告 既然它做到学校里了 就说明真的有女学生去卖卵 我真是求求了 爱惜自己好吗？//@马玉兰还在害人:……我一直以为没啥学生会去贷那种高利贷（这种算不算高利贷啊？完全不了解。我只知道花呗……。\n",
      "789    蚂蚁金服;花呗;大学生贷\n",
      "Name: entity, dtype: object\n"
     ]
    }
   ],
   "source": [
    "sample = test_df[test_df['id']=='e9abc6b0']\n",
    "print(sample['text'].values[0])\n",
    "print(sample['entity'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "rs_df.to_csv('evaluation/tmp/multi_class_cross1-9_1013.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "rs_df['key_entity'] = 'ssss'\n",
    "rs_df.to_csv('evaluation/tmp/entity1.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9728144750000001"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "0.38912579000 /0.4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9390498833333333"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(0.95255572000 - 0.38912579000)/0.6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.964390925"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "0.38575637000  / 0.4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.924355"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(0.94036937000 - 0.38575637000) /0.6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "178.04537499999995"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "5000 * (1-0.964390925)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.924355"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(0.94036937000 - 0.38575637000)/0.6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rs_df.loc[rs_df['key_entity'].map(lambda x:str(x).strip()==''),'negative'] =0\n",
    "rs_df.loc[rs_df['key_entity'].map(lambda x:str(x).strip()==''),'key_entity'] =np.nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "rs_df.to_csv('evaluation/tmp/entity3_20191004.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>negative</th>\n",
       "      <th>key_entity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [id, negative, key_entity]\n",
       "Index: []"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "space_sub = rs_df[rs_df['key_entity'].map(lambda x:str(x).strip()=='')]\n",
    "space_sub"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>negative</th>\n",
       "      <th>predict</th>\n",
       "      <th>entity_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>790</td>\n",
       "      <td>e9abc6b0</td>\n",
       "      <td>1</td>\n",
       "      <td>[0, 0, 0]</td>\n",
       "      <td>['蚂蚁金服', '花呗', '大学生贷']</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           id  negative    predict             entity_list\n",
       "790  e9abc6b0         1  [0, 0, 0]  ['蚂蚁金服', '花呗', '大学生贷']"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "raw_df[raw_df['id']=='e9abc6b0']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  以下是对结果的分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_df = load_basic_dataset(split ='test')\n",
    "raw_df = load_model_rs(model_name='BertSentiEntity',version_id=1)\n",
    "raw_rs_df = reduce_rs_by_id(raw_df)\n",
    "raw_rs_df['key_entity'] = raw_rs_df['key_entity'].map(remove_short_entity_by_long)\n",
    "a = raw_rs_df[raw_rs_df.id.isin(space_sub['id'].values)].sort_values('id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "b = test_df[test_df.id.isin(space_sub['id'].values)].sort_values('id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "37"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "len(torch.Tensor([1,2,3]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([3])"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = torch.Tensor([1,2,3])\n",
    "b = torch.Tensor([1,0,3])\n",
    "a.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dsdc\n",
      "yes\n"
     ]
    }
   ],
   "source": [
    "\n",
    "class TestDataset(Dataset):\n",
    "    def __init__(self, df, max_len=300):\n",
    "        self.x = list(self.make_samples(df, max_len))\n",
    "        self.len = len(self.samples)\n",
    "\n",
    "    def make_samples(self, df, max_len):\n",
    "        texts = df['text'].values\n",
    "        tiltles = df['title'].values\n",
    "        estrs = df['entity'].values\n",
    "        ids = df['id'].values\n",
    "        for i, estr in enumerate(estrs):\n",
    "            for e in estr.split(';'):\n",
    "                yield TextEntitySample(ids[i], texts[i], tiltles[i], e, max_len)\n",
    "\n",
    "    def __getitem__(self, index):\n",
    "        return self.samples[index]\n",
    "\n",
    "    def __len__(self):\n",
    "        return self.len"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
